{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "original_prica = [1, 5, 8, 9, 10, 17, 17, 20, 24, 30, 35]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import defaultdict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "price = defaultdict(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i,p in enumerate(original_prica):\n",
    "    price[i+1] = p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "35"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "price[11]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get the max splitting by enumerate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defaultdict(int,\n",
       "            {1: 1,\n",
       "             2: 5,\n",
       "             3: 8,\n",
       "             4: 9,\n",
       "             5: 10,\n",
       "             6: 17,\n",
       "             7: 17,\n",
       "             8: 20,\n",
       "             9: 24,\n",
       "             10: 30,\n",
       "             11: 35})"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "def r(n):\n",
    "    \n",
    "    return max(\n",
    "        [price[n]] + [r(i) + r(n-i) for i in range(1,n)]\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "30"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "45"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r(15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 209,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@get_time\n",
    "def fibonacci(n):\n",
    "    if n  <= 2:\n",
    "        return 1\n",
    "    else:\n",
    "        return fibonacci(n-1) + fibonacci(n-2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 212,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5702887\n",
      "1.556218147277832\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "print(fibonacci(34))\n",
    "end = time.time()\n",
    "print(end-start)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 241,
   "metadata": {},
   "outputs": [],
   "source": [
    "mem = defaultdict()\n",
    "def fibonacci_op(n):\n",
    "    if n in mem:\n",
    "        return mem[n]\n",
    "    else: \n",
    "        if n <= 2:\n",
    "            mem[n] = 1\n",
    "            return n\n",
    "        else:\n",
    "            result = fibonacci_op(n-1) + fibonacci_op(n-2)\n",
    "            mem[n] = result\n",
    "            return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 242,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14662949395604\n",
      "0.0003120899200439453\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "print(fibonacci_op(64))\n",
    "end = time.time()\n",
    "print(end-start)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Analysis: How to optimize"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## A Simpler Problem"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Decorator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 231,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_time(func):\n",
    "    def wrapper(*args):\n",
    "        start = time.time()\n",
    "        func(*args)\n",
    "        end = time.time()\n",
    "        print('used time : {}'.format(end-start))\n",
    "    return wrapper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [],
   "source": [
    "def f1(func):\n",
    "    def wrapper(*args,**kwargs):\n",
    "        print('Started')\n",
    "        func(*args,**kwargs)\n",
    "        print('Ended')\n",
    "    return wrapper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 235,
   "metadata": {},
   "outputs": [],
   "source": [
    "def f():\n",
    "    print('HELLO')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
    "f = f1(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "wrapper\n"
     ]
    }
   ],
   "source": [
    "print(f.__name__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [],
   "source": [
    "@f1\n",
    "def g(a):\n",
    "    print(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Started\n",
      "hello\n",
      "Ended\n"
     ]
    }
   ],
   "source": [
    "g('hello')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "metadata": {},
   "outputs": [],
   "source": [
    "def k(*arg,**kwargs):\n",
    "    print(kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'b': 5}\n"
     ]
    }
   ],
   "source": [
    "k(6,b=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "from functools import wraps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "def memo(f):\n",
    "    memo.already_computed = {}\n",
    "    @wraps(f)\n",
    "    def _wrap(arg):\n",
    "        if arg in memo.already_computed:\n",
    "            result = memo.already_computed[arg]\n",
    "        else:\n",
    "            result = f(arg)\n",
    "            memo.already_computed[arg] = result\n",
    "        return result\n",
    "    return _wrap"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# We use this method to solve Cut Rod probelm¶"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "solution = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "@memo\n",
    "def r(n):\n",
    "    \"\"\"\n",
    "    Args: n is the iron length\n",
    "    Return: the max revenue \n",
    "    \"\"\"\n",
    "    max_price, max_split = max(\n",
    "        [(price[n], 0)] + [(r(i) + r(n-i), i) for i in range(1, n)], key=lambda x: x[0]\n",
    "    )\n",
    "\n",
    "    solution[n] = (n - max_split, max_split)\n",
    "    \n",
    "    return max_price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "60"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defaultdict(int,\n",
       "            {1: 1,\n",
       "             2: 5,\n",
       "             3: 8,\n",
       "             4: 9,\n",
       "             5: 10,\n",
       "             6: 17,\n",
       "             7: 17,\n",
       "             8: 20,\n",
       "             9: 24,\n",
       "             10: 30,\n",
       "             11: 35,\n",
       "             15: 0,\n",
       "             14: 0,\n",
       "             13: 0,\n",
       "             12: 0,\n",
       "             20: 0,\n",
       "             19: 0,\n",
       "             18: 0,\n",
       "             17: 0,\n",
       "             16: 0})"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1: (1, 0),\n",
       " 2: (2, 0),\n",
       " 3: (3, 0),\n",
       " 4: (2, 2),\n",
       " 5: (3, 2),\n",
       " 6: (6, 0),\n",
       " 7: (6, 1),\n",
       " 8: (6, 2),\n",
       " 9: (6, 3),\n",
       " 10: (10, 0),\n",
       " 11: (11, 0),\n",
       " 12: (11, 1),\n",
       " 13: (11, 2),\n",
       " 14: (11, 3),\n",
       " 15: (13, 2),\n",
       " 16: (14, 2),\n",
       " 17: (11, 6),\n",
       " 18: (17, 1),\n",
       " 19: (17, 2),\n",
       " 20: (17, 3)}"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "solution"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# How do we parse solution?¶"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "def parse_solution(n):\n",
    "    left_split, right_split = solution[n]\n",
    "    \n",
    "    if right_split == 0: return [left_split]\n",
    "    \n",
    "    return parse_solution(left_split) + parse_solution(right_split)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "75"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r(24)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[11, 6, 3]"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "parse_solution(20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Edit Distance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "solution = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "from functools import lru_cache"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "@lru_cache(maxsize=2**10)\n",
    "def edit_distance(string1, string2):\n",
    "    \n",
    "    if len(string1) == 0: return len(string2)\n",
    "    if len(string2) == 0: return len(string1)\n",
    "    \n",
    "    tail_s1 = string1[-1]\n",
    "    tail_s2 = string2[-1]\n",
    "    \n",
    "    candidates = [\n",
    "        (edit_distance(string1[:-1], string2) + 1, 'DEL {}'.format(tail_s1)),  \n",
    "        # string 1 delete tail\n",
    "        (edit_distance(string1, string2[:-1]) + 1, 'ADD {}'.format(tail_s2)),  \n",
    "        # string 1 add tail of string2\n",
    "    ]\n",
    "    \n",
    "    if tail_s1 == tail_s2:\n",
    "        both_forward = (edit_distance(string1[:-1], string2[:-1]) + 0, '')\n",
    "    else:\n",
    "        both_forward = (edit_distance(string1[:-1], string2[:-1]) + 1, 'SUB {} => {}'.format(tail_s1, tail_s2))\n",
    "\n",
    "    candidates.append(both_forward)\n",
    "    \n",
    "    min_distance, operation = min(candidates, key=lambda x: x[0])\n",
    "    \n",
    "    solution[(string1, string2)] = operation \n",
    "    \n",
    "    return min_distance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "edit_distance('ABCDECG','ABCCEF')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{('A', 'A'): '',\n",
       " ('A', 'AB'): 'ADD B',\n",
       " ('A', 'ABC'): 'ADD C',\n",
       " ('A', 'ABCC'): 'ADD C',\n",
       " ('A', 'ABCCE'): 'ADD E',\n",
       " ('A', 'ABCCEF'): 'ADD F',\n",
       " ('AB', 'A'): 'DEL B',\n",
       " ('AB', 'AB'): '',\n",
       " ('AB', 'ABC'): 'ADD C',\n",
       " ('AB', 'ABCC'): 'ADD C',\n",
       " ('AB', 'ABCCE'): 'ADD E',\n",
       " ('AB', 'ABCCEF'): 'ADD F',\n",
       " ('ABC', 'A'): 'DEL C',\n",
       " ('ABC', 'AB'): 'DEL C',\n",
       " ('ABC', 'ABC'): '',\n",
       " ('ABC', 'ABCC'): 'ADD C',\n",
       " ('ABC', 'ABCCE'): 'ADD E',\n",
       " ('ABC', 'ABCCEF'): 'ADD F',\n",
       " ('ABCD', 'A'): 'DEL D',\n",
       " ('ABCD', 'AB'): 'DEL D',\n",
       " ('ABCD', 'ABC'): 'DEL D',\n",
       " ('ABCD', 'ABCC'): 'SUB D => C',\n",
       " ('ABCD', 'ABCCE'): 'ADD E',\n",
       " ('ABCD', 'ABCCEF'): 'ADD F',\n",
       " ('ABCDE', 'A'): 'DEL E',\n",
       " ('ABCDE', 'AB'): 'DEL E',\n",
       " ('ABCDE', 'ABC'): 'DEL E',\n",
       " ('ABCDE', 'ABCC'): 'DEL E',\n",
       " ('ABCDE', 'ABCCE'): '',\n",
       " ('ABCDE', 'ABCCEF'): 'ADD F',\n",
       " ('ABCDEC', 'A'): 'DEL C',\n",
       " ('ABCDEC', 'AB'): 'DEL C',\n",
       " ('ABCDEC', 'ABC'): 'DEL C',\n",
       " ('ABCDEC', 'ABCC'): '',\n",
       " ('ABCDEC', 'ABCCE'): 'DEL C',\n",
       " ('ABCDEC', 'ABCCEF'): 'SUB C => F',\n",
       " ('ABCDECG', 'A'): 'DEL G',\n",
       " ('ABCDECG', 'AB'): 'DEL G',\n",
       " ('ABCDECG', 'ABC'): 'DEL G',\n",
       " ('ABCDECG', 'ABCC'): 'DEL G',\n",
       " ('ABCDECG', 'ABCCE'): 'DEL G',\n",
       " ('ABCDECG', 'ABCCEF'): 'DEL G'}"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "solution"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Todo: Parse Solution is our homework"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Problem Case 3: Pinyin Auto Correction Problem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "chinese_dataset = 'article_9k.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "CHINESE_CHARATERS = open(chinese_dataset).read()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'此外自本周6月12日起除小米手机6等15款机型外其余机型已暂停更新发布含开发版体'"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "CHINESE_CHARATERS[:40]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pinyin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'ni hao ， zhong guo'"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pinyin.get('你好，中国',format='strip',delimiter=' ')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "def chinese_to_pinyin(character):\n",
    "    return pinyin.get(character, format='strip', delimiter=' ')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "CHINESE_CHARATERS_COPYS = chinese_to_pinyin(CHINESE_CHARATERS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "129433034"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(CHINESE_CHARATERS_COPYS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "def tokens(text):\n",
    "    \"List all the pinyin characters\"\n",
    "    return re.findall('[a-z]+',text.lower())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'ci wai zi ben zhou 6 yue 1 2 ri qi chu xiao mi shou ji 6 deng 1 5 kuan ji xing wai qi yu ji xing yi '"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "CHINESE_CHARATERS_COPYS[:100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['ci',\n",
       " 'wai',\n",
       " 'zi',\n",
       " 'ben',\n",
       " 'zhou',\n",
       " 'yue',\n",
       " 'ri',\n",
       " 'qi',\n",
       " 'chu',\n",
       " 'xiao',\n",
       " 'mi',\n",
       " 'shou',\n",
       " 'ji',\n",
       " 'deng',\n",
       " 'kuan',\n",
       " 'ji',\n",
       " 'xing',\n",
       " 'wai',\n",
       " 'qi',\n",
       " 'yu',\n",
       " 'ji',\n",
       " 'xing',\n",
       " 'yi']"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokens(CHINESE_CHARATERS_COPYS[:100])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter, defaultdict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "PINYIN_COUNT = Counter(tokens(CHINESE_CHARATERS_COPYS))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "def correct(word):\n",
    "    'Find the most possible pinyin based on edit distance'\n",
    "    # Prefer edit distance 0, then 1, then 2; otherwist default to word itself\n",
    "    candidates = (known(edits0(word)) or\n",
    "                  known(edits1(word)) or\n",
    "                  known(edits2(word)) or\n",
    "                  [word])\n",
    "    return max(candidates,key=PINYIN_COUNT.get)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "def known(words):\n",
    "    'Return the pinyin in our data'\n",
    "    return {w for w in words if w in PINYIN_COUNT}\n",
    "\n",
    "def edits0(word):\n",
    "    'Return all strings that are zero edits away from word (i.e., just word itself).'\n",
    "    return {word}\n",
    "\n",
    "def edits2(word):\n",
    "    'Return all strings that are two edits away from this pinyin.'\n",
    "    return {e2 for e1 in edits1(word) for e2 in edits1(e1)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "alphabet = 'abcdefghijklmnopqrstuvwxyz'\n",
    "\n",
    "def splits(word):\n",
    "    'Return a list of all possible (first, rest) pairs that comprise pinyin.'\n",
    "    return [(word[:i], word[i:])\n",
    "           for i in range(len(word)+1)]\n",
    "\n",
    "def edits1(word):\n",
    "    'Return all strings that are one edit away from this pinyin.'\n",
    "    pairs = splits(word)\n",
    "    deletes = [a+b[1:] for (a,b) in pairs if b]\n",
    "    transposes = [a+b[1]+b[0]+b[2:] for (a,b) in pairs if len(b) > 1]\n",
    "    replaces = [a+c+b[1:] for (a,b) in pairs for c in alphabet if b]\n",
    "    inserts = [a+c+b for (a,b) in pairs for c in alphabet]\n",
    "    return set(deletes + transposes + replaces + inserts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('', 'pinyin'),\n",
       " ('p', 'inyin'),\n",
       " ('pi', 'nyin'),\n",
       " ('pin', 'yin'),\n",
       " ('piny', 'in'),\n",
       " ('pinyi', 'n'),\n",
       " ('pinyin', '')]"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "splits('pinyin')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'pinyin'}\n"
     ]
    }
   ],
   "source": [
    "print(edits0('pinyin'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'piwyin', 'winyin', 'pinyon', 'pinpin', 'pinyia', 'pwinyin', 'piqyin', 'pinlyin', 'pinyino', 'pinyxn', 'pinwyin', 'pinyio', 'pinayin', 'pinyikn', 'pincin', 'pinyiy', 'pinyinc', 'jinyin', 'pinyien', 'pinyi', 'pinyine', 'pinyqn', 'pifnyin', 'pinbyin', 'pindyin', 'xinyin', 'pinynin', 'pinyzn', 'pinyis', 'pinyimn', 'pinyih', 'ppnyin', 'pisyin', 'pvinyin', 'tinyin', 'qinyin', 'pinyifn', 'pinyik', 'pinwin', 'pinzyin', 'yinyin', 'pinysn', 'pinyini', 'pinyjin', 'pinrin', 'pinyinx', 'pinnin', 'piniyin', 'pzinyin', 'pinfyin', 'pinygin', 'pinuin', 'puinyin', 'ponyin', 'piznyin', 'kinyin', 'piyyin', 'cpinyin', 'vinyin', 'pidyin', 'pinyitn', 'pinyink', 'pinyvin', 'pinydin', 'pnnyin', 'ninyin', 'qpinyin', 'pinxin', 'pinyins', 'jpinyin', 'piiyin', 'pinycn', 'ginyin', 'bpinyin', 'pinryin', 'pinqin', 'pbnyin', 'ypinyin', 'pifyin', 'ipnyin', 'pinyinf', 'pineyin', 'pinyiw', 'pinymn', 'psinyin', 'pinqyin', 'pinyic', 'pijnyin', 'pinyirn', 'pingyin', 'pinyan', 'cinyin', 'pinypin', 'pinyrin', 'ptinyin', 'gpinyin', 'pznyin', 'pinyinr', 'pjinyin', 'pqnyin', 'pintyin', 'pqinyin', 'pinyinb', 'pinyinp', 'pinyiv', 'npinyin', 'pdinyin', 'pinyijn', 'pwnyin', 'pinyxin', 'pinoyin', 'pinyn', 'pihnyin', 'pinyoin', 'xpinyin', 'pinyie', 'pinyibn', 'pkinyin', 'pinyinq', 'piniin', 'pinyil', 'piyin', 'pinyind', 'linyin', 'pixyin', 'pinyinw', 'pibyin', 'pijyin', 'ppinyin', 'pinyinm', 'pdnyin', 'ptnyin', 'pindin', 'pyinyin', 'pintin', 'pknyin', 'pibnyin', 'pxinyin', 'pinyicn', 'pimnyin', 'dinyin', 'piayin', 'pinyinh', 'epinyin', 'pinvyin', 'pinsyin', 'pinyinv', 'pinyun', 'pinyion', 'pinjin', 'pinyyin', 'pinyisn', 'pinyiu', 'pinmyin', 'pinein', 'pinoin', 'pmnyin', 'hinyin', 'pinuyin', 'pixnyin', 'punyin', 'upinyin', 'pinhin', 'hpinyin', 'pingin', 'pinyign', 'pinygn', 'rpinyin', 'einyin', 'pinyiun', 'ipinyin', 'pinyiq', 'panyin', 'pianyin', 'rinyin', 'pcnyin', 'pfnyin', 'pinyvn', 'pvnyin', 'zpinyin', 'pinysin', 'pniyin', 'pinytn', 'pinyim', 'pinydn', 'pinnyin', 'pninyin', 'pinyii', 'pinyhin', 'pinpyin', 'pinyiz', 'pinkin', 'piniyn', 'pigyin', 'pieyin', 'pinyixn', 'pioyin', 'plinyin', 'psnyin', 'pinybn', 'tpinyin', 'pinykn', 'pinbin', 'pinybin', 'pinyqin', 'spinyin', 'pynyin', 'piuyin', 'pinyian', 'inyin', 'piwnyin', 'uinyin', 'pbinyin', 'pincyin', 'pimyin', 'pinvin', 'wpinyin', 'pipnyin', 'binyin', 'pinytin', 'pinyfin', 'pinzin', 'pinyhn', 'piynin', 'pinyip', 'plnyin', 'pinyen', 'penyin', 'phinyin', 'pgnyin', 'vpinyin', 'pinyiqn', 'pinyir', 'pinyihn', 'kpinyin', 'sinyin', 'finyin', 'pinyuin', 'pilnyin', 'pinyinl', 'lpinyin', 'piunyin', 'pjnyin', 'pinyij', 'pinhyin', 'pinying', 'pinyzin', 'pcinyin', 'iinyin', 'opinyin', 'peinyin', 'pinyina', 'pinyipn', 'piqnyin', 'prinyin', 'pnyin', 'pinywn', 'pilyin', 'piinyin', 'pisnyin', 'pinywin', 'pihyin', 'pidnyin', 'pirnyin', 'pinyjn', 'pikyin', 'pinyinn', 'pfinyin', 'pinylin', 'zinyin', 'pinyid', 'pinlin', 'pinyif', 'pinyain', 'pinynn', 'prnyin', 'pxnyin', 'pinyiwn', 'pinin', 'pinyib', 'pienyin', 'pignyin', 'pinyrn', 'pinyni', 'pinyit', 'pminyin', 'pinyizn', 'pinyiln', 'pginyin', 'pinkyin', 'phnyin', 'mpinyin', 'pinyint', 'pinyiny', 'pinfin', 'pinyin', 'poinyin', 'pinain', 'pivyin', 'pitnyin', 'pinyln', 'pinyinz', 'pinyein', 'pinyyn', 'pinycin', 'pinymin', 'ainyin', 'pinmin', 'dpinyin', 'pinyig', 'picyin', 'pinjyin', 'piynyin', 'pinyix', 'fpinyin', 'pinyidn', 'pipyin', 'painyin', 'pinyiin', 'pinykin', 'pinypn', 'pivnyin', 'pinyivn', 'oinyin', 'piknyin', 'pityin', 'pinyfn', 'apinyin', 'pinyiyn', 'pizyin', 'pinsin', 'minyin', 'pinyinu', 'piryin', 'pinyinj', 'pionyin', 'picnyin', 'pinxyin'}\n"
     ]
    }
   ],
   "source": [
    "print(edits1('pinyin'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'yin'"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "correct('yin')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'ying'"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "correct('yign')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'ying'"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "correct('yinn')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "def correct_sequence_pinyin(text_pinyin):\n",
    "    return ' '.join(map(correct, text_pinyin.split()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'zhe shi yi ge ce shi'"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "correct_sequence_pinyin('zhe sih yi ge ce sho')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'wo xiang shang qing hua da xue'"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "correct_sequence_pinyin('wo xiang shagn qinng hua da xue')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 思考题-homework？    \n",
    "#### 如何在不带空格的时候完成自动修整？--> 如何完成拼音的自动分割？   \n",
    "###### 提示：使用第一节课提到的语言模型!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "woyaoshangqinghua\n",
    "w yaoshangqinghua\n",
    "wo yaoshangqinghua\n",
    "woyao shangqinghua\n",
    "\n",
    "-> DP"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
